library(dplyr)
library(magrittr)
library(tidyr)
library(ggplot2)

dat <- rio::import("data/old cities.dta")

# Convert numeric to values
reg_labels <- as.data.frame(attr(dat$Region, "labels"))
reg_labels$region <- row.names(reg_labels)
names(reg_labels)[1] <- "num"
port_labels <- as.data.frame(attr(dat$Port, "labels"))
port_labels$port <- row.names(port_labels)
names(port_labels)[1] <- "num"

dat$Region <- reg_labels$region[match(dat$Region, reg_labels$num)]
dat$Port <- port_labels$port[match(dat$Port, port_labels$num)]


dat_sum <- dat %>%
  filter(Founding >= -2000) %>%
  mutate(century = floor((Founding + 3650) / 200) - 18) %>%
  group_by(century) %>%
  mutate(century_n = n()) %>%
  group_by(century, Port) %>%
  summarize(pct = n() / mean(century_n) * 100) %>%
  ungroup() %>%
  mutate(century = century * 200,
         Port = case_when(Port == 'N' ~ 'None',
                          Port == 'OP' ~ 'Ocean',
                          Port == 'RP' ~ 'River')) %>%
  data.frame

p <- ggplot(subset(dat_sum, Port != 'None'), aes(x = century, y = pct)) +
  geom_line(aes(group = Port, linetype = Port)) +
  labs(y = "Percentage of Old Cities Founded",
       x = "Year") +
  theme(panel.background = element_blank(),
        axis.line = element_line(color = "black"),
        legend.key.size = unit(1, "cm"),
        legend.key = element_rect(fill = "white")) +
  scale_linetype_manual(name = "Port Type", values = c(1, 2)) +
  ylim(c(0, 100)) +
  xlim(c(-2000, 1800)) +
  scale_x_continuous(breaks = c(-2000, -1000, 0, 1000, 1800),
                     labels = c("2000 BCE", "1000 BCE", "0", "1000 CE", "1800 CE"))

# ggsave(p, file = "figure_7_1.png")
ggsave(p, file = "output/figure_7_1.tiff", dpi = 300)
